This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

library(rjson)
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(tokenizers)
library(stopwords)
library(stringr)

jobs_2020 = fromJSON(file = "indeed_job_descs_2020_09_20.json")
jobs_2021 = fromJSON(file = "indeed_job_descs_2021_01_25.json")

Getting the data into a workable DF

jobs_df = data.frame("Job", "State", "Employment", 
                     "description", 2020)

names(jobs_df) = c("Job","State","Employment", "Description", "Year")

for(i in 1:length(jobs_2020)) {
  job = jobs_2020[[i]]$request_params[1]
  state = jobs_2020[[i]]$request_params[2]
  employment = jobs_2020[[i]]$request_params[3]
  year = 2020
  
  #print(i)
  
  for(j in 1:length(jobs_2020[[i]]$job_descriptions)) {
    descript = jobs_2020[[i]]$job_descriptions[j]
    temp_df = as.data.frame(c(job,state,employment, descript, year))
    names(temp_df) = c("Job","State","Employment", "Description", "Year")
    jobs_df = rbind(jobs_df, temp_df)
    #print(j)
  }
}


head(jobs_df)
dim(jobs_df)
[1] 591   5
#unique(factor(jobs_df$State))

There is no data for anything past the 4th description

#1:4 because every job after the 4th one has no description
for(i in 1:4) {
  job = jobs_2021[[i]]$request_params[1]
  state = jobs_2021[[i]]$request_params[2]
  employment = jobs_2021[[i]]$request_params[3]
  year = 2021
  
  print(i)
  
  for(j in 1:length(jobs_2021[[i]]$job_descriptions)) {
    descript = jobs_2021[[i]]$job_descriptions[j]
    temp_df = as.data.frame(c(job,state,employment, descript, year))
    names(temp_df) = c("Job","State","Employment", "Description", "Year")
    jobs_df = rbind(jobs_df, temp_df)
    print(j)
  }
}
[1] 1
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 2
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 3
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 4
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
#jobs_2021
head(jobs_df)
dim(jobs_df)
[1] 760   5
#remove the first row because it is useless
jobs_df <- jobs_df[-1,]

#make the names easier for when we split
jobs_df$Job = str_replace_all(jobs_df$Job, "\\+", "_")

#remove potential sources of weird string behavior
#in a couple of samples it was relatively common to see "a.T" as in the end of one sentence into the next
jobs_df$Description = str_replace_all(jobs_df$Description, "\\.", " ")
#just some visualization of the data 
par(mar=c(11,4,4,4))
barplot(height = table(jobs_df$Job), las = 2, cex.names = .75)


table(jobs_df$Job)

         business_analyst            data_architect            data_scientist             deep_learning human_resource_specialist 
                       39                        43                        45                        42                         8 
machine_learning_engineer                 marketing            office_manager                 recruiter                researcher 
                       30                        71                        39                        82                        40 
                    sales site_reliability_engineer        software_developer              statistician             test_engineer 
                       79                        42                        41                        27                        44 
              ux_designer 
                       87 

You will need to download tokenizers and stopwords

#splitting the DF
split_df = split(jobs_df, jobs_df$Job)

#tokenize function that takes job descriptions and tokenizes them in a new DF
#That DF gets merged back with the old one 
tokenize.df = function(df) {
  
  temp = data.frame()
  for (i in 1:length(df$Description)) {
    tokens = table(tokenize_words(df$Description[i], 
                                  stopwords = stopwords::stopwords("en")))
    #temp DF to merge later 
    temp = bind_rows(temp, tokens)
    
  }
  #setting NA to 0 because it is easier to use in this case
  temp[is.na(temp)] = 0
  df = bind_cols(df,temp)
  return(df)
}
tokenize_ngrams.df = function(df) {
  
  temp = data.frame()
  for (i in 1:length(df$Description)) {
    tokens = table(tokenize_ngrams(df$Description[i], n = 3, n_min = 1,
                                  stopwords = stopwords::stopwords("en")))
    #temp DF to merge later 
    temp = bind_rows(temp, tokens)
    
  }
  #setting NA to 0 because it is easier to use in this case
  temp[is.na(temp)] = 0
  df = bind_cols(df,temp)
  return(df)
}

This part takes some time


#for loop to go through each job type and tokenize the description
for (i in 1:length(split_df)) {
  temp = split_df[i]
  split_df[[i]] = tokenize.df(temp[[1]])
  print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16

This part is grabbing the totals of each column

master_li = list()
for (i in 1:length(split_df)) {
  
  #getting a temp holder
  temp = apply(split_df[[i]][,6:length(split_df[[i]])], 2, sum)
  #getting a normalization factor 
  factor = mean(apply(split_df[[i]][,6:length(split_df[[i]])], 1, sum))
  #getting some sort of normalized data for the master list
  master_li[[names(split_df)[i]]] = temp / factor
  
}

max(master_li$human_resource_specialist)
[1] 0.1767956
tokenize_resume = function(path) {
  #inputting sarah resume 
  resume = readLines(path, warn = F)
  resume = str_c(resume, sep = " ", collapse = " ") 
  
  #tokenizing the resume 
  tokens = table(tokenize_words(resume, 
                                    stopwords = stopwords::stopwords("en")))
  
  tokens = as.list(tokens, all.names = F)
  
  tokens = as.data.frame(tokens)
  return(tokens)
}
zac = tokenize_resume("test.txt")
zac
sarah = tokenize_resume("sarahresumetxt.txt")
sarah


zac = tokenize_resume("test.txt")
zac

bm = best_matches_overview(master_li, sarah)
print(sort(bm[,1], decreasing = T))
                marketing               ux_designer                 recruiter                     sales                researcher 
                25.437447                 23.099432                 23.007541                 20.967286                 14.029738 
           data_scientist             deep_learning          business_analyst             test_engineer        software_developer 
                13.443155                 13.202629                 13.018318                 12.981715                 12.911496 
           data_architect site_reliability_engineer            office_manager machine_learning_engineer              statistician 
                12.740836                 10.449200                 10.366505                  8.560117                  7.887578 
human_resource_specialist 
                 2.495229 
#converting job names to a matrix
job_names  = names(sort(bm[,1], decreasing = T)[1:3])
job_names = str_split_fixed(job_names, "", 1)

Since it is a human in the model algo we can then either determine if these seem like they will fit or not. We could just grab the first 3 and get a few more examples

best_matches = function(df, job_name, resume) {
  matches = list()
  #outerloop for the overall job category
  for (i in 1:nrow(job_name)) {
    #initialize a score and an index to return later on 
    score = 0
    index = 1
    print(job_name[i,1])
    #innerloop to get the best job match from the category
    for(j in 1:nrow(df[[i]])) {
      #temp is a DF of just the jth row only containing columns with tokenization
      temp = df[[job_name[i,1] ]][j, 6:ncol(df[[job_name[i,1]]])]
      tester = bind_rows(temp, resume)
      tester[is.na(tester)] = 0
      new_score = sum(apply(tester, 2, multi), na.rm = T)
      #changes the best seen score 
      if (new_score > score) {
        index = j 
        score = new_score 
      }
      
    }
    #sets the list with name of overall job and appends the best scoring description 
    matches[[job_name[i,1]]] = df[[job_name[i,1]]][index, 4]
    }
  return(matches)
}


best = best_matches(split_df, job_names, sarah)
[1] "marketing"
[1] "ux_designer"
[1] "recruiter"
best
$marketing
[1] "QualificationsMaster's (Required)marketing and/or communications: 8 years (Required)Full Job DescriptionPosition SummaryReporting to the Dean, the Chief Marketing and Communications Officer sets strategy and directs execution for marketing, public relations, and communications goals for all programs and departments of Columbia Business School  The Chief Marketing and Communications Officer oversees the areas of Marketing, Communications and Public Relations, and is responsible for all internal and external communications for the School, including digital technology and social media as appropriate The candidate has responsibility for the School’s branding strategy and has oversight on implementing that brand identity throughout the School including on the website, print media, marketing collateral, and all communications to internal and external stakeholders  The Chief Marketing and Communications Officer will manage a team of officers of administration ResponsibilitiesThe Chief Marketing and Communications Officer will plan and direct a coordinated communications, public relations and marketing strategy in support of Columbia Business School priorities: bridging academic theory and real-world practice; providing a world-class education that supports a lifetime career of business leadership and entrepreneurial thinking; and supporting the School’s community of students, faculty, staff and alumni Working with the dean and School senior staff, the Chief Marketing and Communications Officer will drive efforts to continue to distinguish and enhance the brand of the institution through all communications to the public, and manage media, print and web communications; direct information through media organizations and constituencies of the School The Chief Marketing and Communications Officer will work closely with the Vice Dean of Research to create a strategy for faculty research promotion to ensure that both internal and external stakeholders are aware of faculty impact beyond what is taught in the classroom  This strategy must include the website, media outreach, School-sponsored conferences and events, marketing materials, internal magazines and postings, and social media The Chief Marketing and Communications Officer will maintain the social media presence for the dean in LinkedIn, Instagram, Twitter, and on the School’s website The Chief Marketing and Communications Officer will be responsible for the implementation of the School’s new website, ensuring that stakeholder needs are addressed and deadlines are met The Chief Marketing and Communications Officer will have direct responsibility for marketing and communications for the MBA, Executive MBA and Executive Education programs and will oversee communications for other School centers, programs and departments  Measure the performance of all activities and campaigns, assess them against goals, and adjust tactics when necessary to achieve goals  Provide ongoing reporting, develop actionable insights, test and learn new approaches  Conduct thorough analyses on marketing ROI and successfully develop cost effective solutions The Chief Marketing and Communications Officer will develop strategy to increase visibility and frequency of positive media interaction of faculty, the dean and alumni of the School  Promote the School’s reputation and influence among peer business schools, prospective students and their families, alumni, and the mediaThe Chief Marketing and Communications Officer will work with the Vice Dean for External Relations and Development and the Executive Director of Alumni and Corporate Relations to provide guidance on and support for communications to alumni and corporate partners The Chief Marketing and Communications Officer will manage the dean’s communications internally and externally  S/he must capture the dean’s voice in communications internally and externally for events including commencements, orientations, welcome and update emails to faculty, staff, students, alumni, and Board members The Chief Marketing and Communications Officer will work with the Executive Director of Public Relations and oversee tactical aspects of the School’s PR program, including third-party rankings, collaborations with business media, outreach metrics and branded merchandise The Chief Marketing and Communications Officer will advise the dean’s Senior Staff in matters of internal and crisis communications with students, faculty and staff and be the liaison with the University on a coordinated messageS/he will evaluate and prioritize the print and publication program of the School as well as hold an advisory role with the CBS Publishing imprint Minimum QualificationsBachelor’s degree required  Advanced degree strongly preferred  Minimum of 8–10 years’ experience in progressively responsible roles in marketing and/or communications required, including at least four years in marketing Must have demonstrated excellent written, oral, interpersonal communication, organizational and follow-through skills  Writing and editing experience necessary, as well as managerial experience in supervising projects and/or directing the activities of communications and marketing staffs  Must have the ability to lead and work in a high volume, high demand, and highly collaborative, supportive, team oriented office  Must be organized, creative, enterprising, persuasive, and tactful  Must demonstrate flexibility and the ability to perform well under stress  Must possess understanding of the job at the strategic as well as tactical levels  Must be able to work well with people at all levels of the organization  Some evening and weekend hours required Equal Opportunity Employer / Disability / VeteranColumbia University is committed to the hiring of qualified local residents Job Type: Full-timePay: $200,000 00 - $230,000 00 per yearBenefits:401(k)401(k) matchingDental insuranceDisability insuranceHealth insurancePaid time offTuition reimbursementVision insuranceSchedule:8 hour shiftEducation:Master's (Required)Experience:marketing and/or communications: 8 years (Required)Work Remotely:Temporarily due to COVID-19"

$ux_designer
[1] "Senior UX Architect, New York\nYour Team responsibilities\nWe are searching for an experienced UX professional to join the global team developing MSCI’s next generation financial platforms  Our teams are based in US, Europe and Asia  This position, located in New York, is part of the MSCI UX team and will report to the UX Lead in New York \nYou will be responsible for building strong relationships across Design, Product, Engineering, Marketing, and Research, and create an engaging, seamless, effective user experience  In this role, we expect an analytical and creative UX professional who is able to grasp user needs and solve problems based on Lean UX Methodology \nWhat we will offer you: Depending on your location of your role, you can expect …\nCompetitive fixed and variable compensation, holiday/vacation allowance & retirement savings plans/pensions\nEmployee Resource Groups to support you in and out of the office\nA wide range of benefits including – healthcare, dental plans, risk insurances and (location dependent) - cycletowork schemes, gym benefits, retail discounts\nA purposeful approach to Wellbeing including training, support networks, membership to wellness platforms and vendors, and active local office communities\nA specific and deliberate planning to the physical offices in which we work, and support for everyone spending periods of time working remotely or at home  This approach mirrors our commitment to transparency and sustainability and puts the safety and wellness of our employees at the center of all we do  We aim to provide productive and sustainable work environments and technology that encourages collaboration, creativity and innovation \nYour key responsibilities\nUnderstand product and business requirements for both financial analytics functionality and the platform technology toolkit\nCreate user-centered design, wireframes, user flows, visual design comps and prototype using Sketch and InVision\nBuilding, iterating on and maintaining a design language system across our core products\nPresentation to internal and external users to conduct concept validation, usability testing and gather feedback\nSupport product and development teams to maintain design standards in the development process\nConduct design QA for implemented screens\nYour skills and experience that will help you excel\n7+ years of professional experience with Interaction Design for software, web applications\nPortfolio of UX-focused work samples for web and mobile applications\nBachelor’s Degree in Design or a related field\nAnalytical, detail-oriented, and problem-solving aptitude\nStrong work ethic and motivation to constantly improve the product and user experience\nInterest and ability to learn relevant financial analytics and technical subject matter\nStrong teamwork and communication skills – we actively collaborate with everyone\nProficiency in Sketch, Adobe CC, InVision and associated design tools\nDesign language, style guide, building and maintaining cohesive design system\nCan balance competing priorities, including short vs long term value and business vs user\nFamiliarity with various UX research tools, Agile and Lean UX methodologies\nPreferred Qualifications\n3+ years of experience in financial industry\nExperience managing, leading and mentoring UI, UX designers\nExcellent communication, presentation, interpersonal skills\nWork related to financial data and analytics products, trading systems, or similar\nUX work on highly technical or complex products in any field\nData visualization design experience\nExperience with HTML/CSS; JavaScript\nAccessibility\nDegree in Psychology, Social Science, Computer Science, Human Computer\nInteraction or a related field\n\n\nHow we’ll support you\nCoaching and support from experts in your team\nA performance and growth-oriented culture and values\nOpportunities for continuous learning to aid progression\nGoal based objectives and development plans\nTransparent performance-based compensation schemes\nEmployee resource groups such as the Women’s Leadership Forum, MSCIPRIDE, and Eco-Groups \nAbout MSCI and our teams:\nMSCI is a market leader in Global Indexes, Smart Beta, ESG and Risk Management, and is at the forefront of the secular trends dominating the financial services landscape today  We are committed to the future sustainability and transparency of the financial markets  We create innovative products and services that allow our clients to make more informed investment decisions, and we provide investors with critical performance measurement and risk management data and analytics  Click here to see what we do (link to MSCI brochure)\nOur values define the working environment we strive to create  We are inclusive, we champion bold ideas, we always pursue excellence, and always act with integrity  Personal accountability and responsibility are key to success, and we always work as a team to remain client centric \nMSCI is committed to developing a culture and workforce that reflects the clients and communities in which we operate  Increasing our diversity expands our talent pool which helps to accelerate innovation in all we do  We especially encourage members of historically underrepresented groups to apply, including women, ethnic minorities and those in the LGBTQ community \nTo all recruitment agencies: MSCI does not accept unsolicited CVs/Resumes  Please do not forward CVs/Resumes to any MSCI employee, location or website  MSCI is not responsible for any fees related to unsolicited CVs/Resumes \nMSCI Inc  is an equal opportunity employer committed to diversifying its workforce  It is the policy of the Firm to ensure equal employment opportunity without discrimination or harassment on the basis of race, color, religion, creed, age, sex, gender, gender identity, sexual orientation, national origin, citizenship, disability, marital and civil partnership/union status, pregnancy (including unlawful discrimination on the basis of a legally protected pregnancy/maternity leave), veteran status, or any other characteristic protected by law "

$recruiter
[1] "Great News! The Nation’s Top Physical Therapy Practice (that’s us!) is looking for lifetime members to join our unconventionally passionate family \n\nAbout the Company:\nSPEAR Physical Therapy is the APTA's- private practice section- National Practice of the Year! SPEAR Physical Therapy is one of the fastest-growing and most progressive physical therapy company in New York City  We consistently receive 5-star Yelp! reviews for our customer service, due in large part to our core team of talented and dedicated staff members  Our company values (or our SPEAR-IT Standards) are Service, Passion, Empathy, Accountability, Respect, Integrity, and Teamwork  The people we take on board live and breathe our values every day, which helps SPEAR impact the lives of over 150,000 New Yorkers a year  We have recently been named the “Nations Top Physical Therapy Company” by the Private Practice Section of the American Physical Therapy Association  This award has never been won by another New York company and is awarded to less than  01% of all physical therapy companies in the nation  SPEAR has also been awarded the “Leadership Development Award” by the New York Physical Therapy Association for a company that exemplifies the spirit, creation and implementation of leadership development  Also, SPEAR was selected to ring the bell at the New York Stock Exchange during National Small Business week \nSPEAR is also the only NYC-based privately-owned physical therapy practice accredited by New York State Department of Education to provide CEU credits to its therapists for its own proprietary on-site management and clinical training meetings (which is the fastest and most effective way to fulfill your CEU obligations)  And, its costs are fully covered by SPEAR!\nTalent Acquisition Specialist\n\nOur Shared Values - As a SPEAR team member, you naturally exhibit SPEAR’s values by default  Your talents instinctively result in actions of Service, Passion, Empathy, Accountability, Respect, Impact and Teamwork  You put people first, and know that each teammate, and each New Yorker, can rely on you to help them become the best versions of themselves \n\nYour Impact - The Talent Acquisition Specialist is responsible for the complete recruitment cycle and meeting human capital demands of SPEAR as the organization continues to grow  As the primary point of contact for all candidates, hiring managers and external vendors, you will ensure that SPEAR can efficiently staff all open roles  You have the initiative to proactively solve problems before they arise and support decisions through data reporting  The Talent Acquisition Specialist is also responsible for supporting the HR Department with onboarding, new hire orientation, system maintenance, compliance, performance management and system related items on an as needed basis \n\nOrganization Structure:\nReports to: Director, HR\nPrimary interactions: All SPEAR Employees (Including Directors) and Candidates\n\nSPEAR-IT Values:Respect & Teamwork: You approach every interaction from a place of respect and teamwork  You are committed to serving as a resource for directors and candidates  You work closely with managers and candidates so that they understand the recruitment process and the systems available to them \nImpact & Passion: You are a champion of SPEAR’s Human Resources Information System - Ceridian Dayforce and you provide reports to show the impact of Recruitment and Onboarding on the organization Empathy & Service: You provide 5-star customer service to all SPEAR employees  You respond efficiently and accordingly because you understand their feelings and needs Accountability: You take pride in meeting all recruitment deadlines and understand the impact this has on the candidate and Hiring Managers’ experience \n\nSuccess Factors:\nRecruitment:\no Respond to all inquiries and requests within 8 businesses hours \no Posts jobs externally within 24 hours of receiving notice of opening from Hiring Manager\no Connects viable candidates with Directors within 24 hours of screening\no Train all new managers on recruitment procedures and systems within 1 week of start\no Ensures strong vendor relationships with Hireright, Ceridian, Indeed, Linked In, Medbridge etc  so that any system or other issues are addressed within 24 hours \no Fosters new relationships and maintains existing relationships with career services staff at universities with DPT programs \nOnboarding:\no Issue offer letter & initiate background screening within 24 hours of a hiring decision\no Send new hires & directors Employee’s onboarding resources at least 2 business days before start date\no Assign all new hires MedBridge Compliance learning track and other role specific training 2 business days prior to employee’s start date\no Run monthly reporting to track completion of compliance training\nHR Systems\no Responds to all HR systems issues within 2 hours of reporting inclusive of\nCeridian Talent modules (Recruitment, Onboarding and Performance Management) Hireright, Medbridge, SPEAR Intranet, Indeed, Linked In,\no Complete system updates to align with department or company initiatives\n\nPosition Summary:\nRecruitment\no Monitors applications for open roles daily, identifies candidates for first round interviews\no Completes detailed documentation of a candidate’s progression through the interview process within 24 hours of an interview\no Maintains average time to fill of 30 days for all roles\no Complies and distributes weekly & monthly recruitment reports \no Ensures strong SPEAR presence at campus career fairs by working with hiring managers to solicit participation, coordinate logistics and communicate details\no Monitors and maintains Ceridian Dayforce for the Recruitment module\no Support the planning and execution of the Classroom to Clinician event twice annually\nOnboarding\no Measure Onboarding & Off boarding activities for all teams to ensure they are completed prior to New Hire start date and by Employee’s last day; disable SPEAR Intranet and Compliance training accounts\no Troubleshoots any onboarding issues and answers questions from new hires regarding offer letter, systems, and onboarding resources\no Communicates onboarding changes to all departments involved in the process\no Maintains and updates onboarding forms\nHR Systems\no Responds to all system inquiries within 4 hours; schedule training for employees within 5 business days of request\no Assigns employees to Mid-Year and Annual Reviews cycles and monitor completion of reviews; offer support as needed\nWe thank all applicants in advance for their interest in this position \nSPEAR Physical Therapy is an Equal Opportunity Employer "
hope
Error: object 'hope' not found
max(master_li$human_resource_specialist)
[1] 0.1767956
#split dataframe into new dataframes based on job description
library(dplyr)
master_df <- jobs_df %>%
  group_by(Job)

group_split(master_df)
<list_of<
  tbl_df<
    Job        : character
    State      : character
    Employment : character
    Description: character
    Year       : double
  >
>[16]>
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

[[7]]

[[8]]

[[9]]

[[10]]

[[11]]

[[12]]

[[13]]

[[14]]

[[15]]

[[16]]
group_keys(master_df)

#stopwords::stopwords("en", source = "snowball")

NEW METHODS:

breaaking jobs down to stem and non-stem then comparing jobs

master_li_nonfactored = list()
for (i in 1:length(split_df)) {
  
  #getting a temp holder
  temp = apply(split_df[[i]][,6:length(split_df[[i]])], 2, sum)
  #getting a normalization factor 
  #factor = mean(apply(split_df[[i]][,6:length(split_df[[i]])], 1, sum))
  #getting some sort of normalized data for the master list
  master_li_nonfactored[[names(split_df)[i]]] = temp #/ factor
  
}


#stem jobs for a new DF
stem_jobs <- c("ux_designer", "test_engineer", "site_reliability_engineer", "data_architect", "data_scientist", "software_developer", "statistician", "deep_learning", "machine_learning_engineer", "business_analyst")

#non-stem jobs for a new DF
non_stem_jobs <- c("recruiter", "marketing", "sales", "office_manager", "human_resource_specialist", "researcher")

#initializing stem DF
stem_df <- data.frame()

#creating new stem DF 
for (title in stem_jobs) {
  stem_df <- bind_rows(stem_df, master_li_nonfactored[[title]])
  
}
#removing NA and replacing with 0
stem_df[is.na(stem_df)] = 0
stem_df

#initializing 
non_stem_df <- data.frame()

#creating non stem DF
for (title in non_stem_jobs) {
  non_stem_df <- bind_rows(non_stem_df, master_li_nonfactored[[title]])
  
}
#adding 0 for NA
non_stem_df[is.na(non_stem_df)] = 0
non_stem_df
stem_tokens <- colnames(stem_df)
non_stem_tokens <- colnames(non_stem_df)

#getting unique set of tokens 
unique_stem_tokens <- setdiff(stem_tokens, non_stem_tokens)
unique_non_stem_tokens <- setdiff(non_stem_tokens, stem_tokens)

part for seeing if the resume matches stem or non stem more

stem_df = stem_df[unique_stem_tokens]
non_stem_df = non_stem_df[unique_non_stem_tokens]

#   
#total_stem = sort(apply(stem_df, 2, sum), decreasing = T)
#total_non_stem = sort(apply(non_stem_df, 2, sum), decreasing = T)


#seeing if a resume is stem or non stem 
stem_or_non = function(resume, stem, non_stem){
  count_stem = 0 
  count_non_stem = 0
  for (i in names(resume)) {
    if (i %in% names(stem)) {
      count_stem = count_stem + 1 
    }
    else if (i %in% names(non_stem)) {
      count_non_stem = count_stem + 1
    }
  }
  li = list("stem" = count_stem, "non_stem" = count_non_stem)
  return(li)
}



values = stem_or_non(zac, stem_df, non_stem_df)
lasso_mod <- glmnet(x_train, y_train, alpha=1, lambda=lasso_cv$lambda.1se)
Error in glmnet(x_train, y_train, alpha = 1, lambda = lasso_cv$lambda.1se) : 
  object 'lasso_cv' not found
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KbGlicmFyeShyanNvbikKbGlicmFyeShkcGx5cikKbGlicmFyeSh0b2tlbml6ZXJzKQpsaWJyYXJ5KHN0b3B3b3JkcykKbGlicmFyeShzdHJpbmdyKQoKam9ic18yMDIwID0gZnJvbUpTT04oZmlsZSA9ICJpbmRlZWRfam9iX2Rlc2NzXzIwMjBfMDlfMjAuanNvbiIpCmpvYnNfMjAyMSA9IGZyb21KU09OKGZpbGUgPSAiaW5kZWVkX2pvYl9kZXNjc18yMDIxXzAxXzI1Lmpzb24iKQpgYGAKCgpHZXR0aW5nIHRoZSBkYXRhIGludG8gYSB3b3JrYWJsZSBERgpgYGB7cn0Kam9ic19kZiA9IGRhdGEuZnJhbWUoIkpvYiIsICJTdGF0ZSIsICJFbXBsb3ltZW50IiwgCiAgICAgICAgICAgICAgICAgICAgICJkZXNjcmlwdGlvbiIsIDIwMjApCgpuYW1lcyhqb2JzX2RmKSA9IGMoIkpvYiIsIlN0YXRlIiwiRW1wbG95bWVudCIsICJEZXNjcmlwdGlvbiIsICJZZWFyIikKCmZvcihpIGluIDE6bGVuZ3RoKGpvYnNfMjAyMCkpIHsKICBqb2IgPSBqb2JzXzIwMjBbW2ldXSRyZXF1ZXN0X3BhcmFtc1sxXQogIHN0YXRlID0gam9ic18yMDIwW1tpXV0kcmVxdWVzdF9wYXJhbXNbMl0KICBlbXBsb3ltZW50ID0gam9ic18yMDIwW1tpXV0kcmVxdWVzdF9wYXJhbXNbM10KICB5ZWFyID0gMjAyMAogIAogICNwcmludChpKQogIAogIGZvcihqIGluIDE6bGVuZ3RoKGpvYnNfMjAyMFtbaV1dJGpvYl9kZXNjcmlwdGlvbnMpKSB7CiAgICBkZXNjcmlwdCA9IGpvYnNfMjAyMFtbaV1dJGpvYl9kZXNjcmlwdGlvbnNbal0KICAgIHRlbXBfZGYgPSBhcy5kYXRhLmZyYW1lKGMoam9iLHN0YXRlLGVtcGxveW1lbnQsIGRlc2NyaXB0LCB5ZWFyKSkKICAgIG5hbWVzKHRlbXBfZGYpID0gYygiSm9iIiwiU3RhdGUiLCJFbXBsb3ltZW50IiwgIkRlc2NyaXB0aW9uIiwgIlllYXIiKQogICAgam9ic19kZiA9IHJiaW5kKGpvYnNfZGYsIHRlbXBfZGYpCiAgICAjcHJpbnQoaikKICB9Cn0KCgpoZWFkKGpvYnNfZGYpCmRpbShqb2JzX2RmKQoKI3VuaXF1ZShmYWN0b3Ioam9ic19kZiRTdGF0ZSkpCmBgYApUaGVyZSBpcyBubyBkYXRhIGZvciBhbnl0aGluZyBwYXN0IHRoZSA0dGggZGVzY3JpcHRpb24KYGBge3J9CiMxOjQgYmVjYXVzZSBldmVyeSBqb2IgYWZ0ZXIgdGhlIDR0aCBvbmUgaGFzIG5vIGRlc2NyaXB0aW9uCmZvcihpIGluIDE6NCkgewogIGpvYiA9IGpvYnNfMjAyMVtbaV1dJHJlcXVlc3RfcGFyYW1zWzFdCiAgc3RhdGUgPSBqb2JzXzIwMjFbW2ldXSRyZXF1ZXN0X3BhcmFtc1syXQogIGVtcGxveW1lbnQgPSBqb2JzXzIwMjFbW2ldXSRyZXF1ZXN0X3BhcmFtc1szXQogIHllYXIgPSAyMDIxCiAgCiAgcHJpbnQoaSkKICAKICBmb3IoaiBpbiAxOmxlbmd0aChqb2JzXzIwMjFbW2ldXSRqb2JfZGVzY3JpcHRpb25zKSkgewogICAgZGVzY3JpcHQgPSBqb2JzXzIwMjFbW2ldXSRqb2JfZGVzY3JpcHRpb25zW2pdCiAgICB0ZW1wX2RmID0gYXMuZGF0YS5mcmFtZShjKGpvYixzdGF0ZSxlbXBsb3ltZW50LCBkZXNjcmlwdCwgeWVhcikpCiAgICBuYW1lcyh0ZW1wX2RmKSA9IGMoIkpvYiIsIlN0YXRlIiwiRW1wbG95bWVudCIsICJEZXNjcmlwdGlvbiIsICJZZWFyIikKICAgIGpvYnNfZGYgPSByYmluZChqb2JzX2RmLCB0ZW1wX2RmKQogICAgcHJpbnQoaikKICB9Cn0KCiNqb2JzXzIwMjEKaGVhZChqb2JzX2RmKQpkaW0oam9ic19kZikKCiNyZW1vdmUgdGhlIGZpcnN0IHJvdyBiZWNhdXNlIGl0IGlzIHVzZWxlc3MKam9ic19kZiA8LSBqb2JzX2RmWy0xLF0KCiNtYWtlIHRoZSBuYW1lcyBlYXNpZXIgZm9yIHdoZW4gd2Ugc3BsaXQKam9ic19kZiRKb2IgPSBzdHJfcmVwbGFjZV9hbGwoam9ic19kZiRKb2IsICJcXCsiLCAiXyIpCgojcmVtb3ZlIHBvdGVudGlhbCBzb3VyY2VzIG9mIHdlaXJkIHN0cmluZyBiZWhhdmlvcgojaW4gYSBjb3VwbGUgb2Ygc2FtcGxlcyBpdCB3YXMgcmVsYXRpdmVseSBjb21tb24gdG8gc2VlICJhLlQiIGFzIGluIHRoZSBlbmQgb2Ygb25lIHNlbnRlbmNlIGludG8gdGhlIG5leHQKam9ic19kZiREZXNjcmlwdGlvbiA9IHN0cl9yZXBsYWNlX2FsbChqb2JzX2RmJERlc2NyaXB0aW9uLCAiXFwuIiwgIiAiKQpgYGAKCgpgYGB7cn0KI2p1c3Qgc29tZSB2aXN1YWxpemF0aW9uIG9mIHRoZSBkYXRhIApwYXIobWFyPWMoMTEsNCw0LDQpKQpiYXJwbG90KGhlaWdodCA9IHRhYmxlKGpvYnNfZGYkSm9iKSwgbGFzID0gMiwgY2V4Lm5hbWVzID0gLjc1KQoKdGFibGUoam9ic19kZiRKb2IpCmBgYAoKWW91IHdpbGwgbmVlZCB0byBkb3dubG9hZCB0b2tlbml6ZXJzIGFuZCBzdG9wd29yZHMKYGBge3J9CiNzcGxpdHRpbmcgdGhlIERGCnNwbGl0X2RmID0gc3BsaXQoam9ic19kZiwgam9ic19kZiRKb2IpCgojdG9rZW5pemUgZnVuY3Rpb24gdGhhdCB0YWtlcyBqb2IgZGVzY3JpcHRpb25zIGFuZCB0b2tlbml6ZXMgdGhlbSBpbiBhIG5ldyBERgojVGhhdCBERiBnZXRzIG1lcmdlZCBiYWNrIHdpdGggdGhlIG9sZCBvbmUgCnRva2VuaXplLmRmID0gZnVuY3Rpb24oZGYpIHsKICAKICB0ZW1wID0gZGF0YS5mcmFtZSgpCiAgZm9yIChpIGluIDE6bGVuZ3RoKGRmJERlc2NyaXB0aW9uKSkgewogICAgdG9rZW5zID0gdGFibGUodG9rZW5pemVfd29yZHMoZGYkRGVzY3JpcHRpb25baV0sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RvcHdvcmRzID0gc3RvcHdvcmRzOjpzdG9wd29yZHMoImVuIikpKQogICAgI3RlbXAgREYgdG8gbWVyZ2UgbGF0ZXIgCiAgICB0ZW1wID0gYmluZF9yb3dzKHRlbXAsIHRva2VucykKICAgIAogIH0KICAjc2V0dGluZyBOQSB0byAwIGJlY2F1c2UgaXQgaXMgZWFzaWVyIHRvIHVzZSBpbiB0aGlzIGNhc2UKICB0ZW1wW2lzLm5hKHRlbXApXSA9IDAKICBkZiA9IGJpbmRfY29scyhkZix0ZW1wKQogIHJldHVybihkZikKfQpgYGAKCgpgYGB7cn0KdG9rZW5pemVfbmdyYW1zLmRmID0gZnVuY3Rpb24oZGYpIHsKICAKICB0ZW1wID0gZGF0YS5mcmFtZSgpCiAgZm9yIChpIGluIDE6bGVuZ3RoKGRmJERlc2NyaXB0aW9uKSkgewogICAgdG9rZW5zID0gdGFibGUodG9rZW5pemVfbmdyYW1zKGRmJERlc2NyaXB0aW9uW2ldLCBuID0gMywgbl9taW4gPSAxLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RvcHdvcmRzID0gc3RvcHdvcmRzOjpzdG9wd29yZHMoImVuIikpKQogICAgI3RlbXAgREYgdG8gbWVyZ2UgbGF0ZXIgCiAgICB0ZW1wID0gYmluZF9yb3dzKHRlbXAsIHRva2VucykKICAgIAogIH0KICAjc2V0dGluZyBOQSB0byAwIGJlY2F1c2UgaXQgaXMgZWFzaWVyIHRvIHVzZSBpbiB0aGlzIGNhc2UKICB0ZW1wW2lzLm5hKHRlbXApXSA9IDAKICBkZiA9IGJpbmRfY29scyhkZix0ZW1wKQogIHJldHVybihkZikKfQpgYGAKCgpUaGlzIHBhcnQgdGFrZXMgc29tZSB0aW1lCmBgYHtyfQoKI2ZvciBsb29wIHRvIGdvIHRocm91Z2ggZWFjaCBqb2IgdHlwZSBhbmQgdG9rZW5pemUgdGhlIGRlc2NyaXB0aW9uCmZvciAoaSBpbiAxOmxlbmd0aChzcGxpdF9kZikpIHsKICB0ZW1wID0gc3BsaXRfZGZbaV0KICBzcGxpdF9kZltbaV1dID0gdG9rZW5pemUuZGYodGVtcFtbMV1dKQogIHByaW50KGkpCn0KCmBgYApUaGlzIHBhcnQgaXMgZ3JhYmJpbmcgdGhlIHRvdGFscyBvZiBlYWNoIGNvbHVtbiAKYGBge3J9Cm1hc3Rlcl9saSA9IGxpc3QoKQpmb3IgKGkgaW4gMTpsZW5ndGgoc3BsaXRfZGYpKSB7CiAgCiAgI2dldHRpbmcgYSB0ZW1wIGhvbGRlcgogIHRlbXAgPSBhcHBseShzcGxpdF9kZltbaV1dWyw2Omxlbmd0aChzcGxpdF9kZltbaV1dKV0sIDIsIHN1bSkKICAjZ2V0dGluZyBhIG5vcm1hbGl6YXRpb24gZmFjdG9yIAogIGZhY3RvciA9IG1lYW4oYXBwbHkoc3BsaXRfZGZbW2ldXVssNjpsZW5ndGgoc3BsaXRfZGZbW2ldXSldLCAxLCBzdW0pKQogICNnZXR0aW5nIHNvbWUgc29ydCBvZiBub3JtYWxpemVkIGRhdGEgZm9yIHRoZSBtYXN0ZXIgbGlzdAogIG1hc3Rlcl9saVtbbmFtZXMoc3BsaXRfZGYpW2ldXV0gPSB0ZW1wIC8gZmFjdG9yCiAgCn0KCm1heChtYXN0ZXJfbGkkaHVtYW5fcmVzb3VyY2Vfc3BlY2lhbGlzdCkKYGBgCgpgYGB7cn0KdG9rZW5pemVfcmVzdW1lID0gZnVuY3Rpb24ocGF0aCkgewogICNpbnB1dHRpbmcgc2FyYWggcmVzdW1lIAogIHJlc3VtZSA9IHJlYWRMaW5lcyhwYXRoLCB3YXJuID0gRikKICByZXN1bWUgPSBzdHJfYyhyZXN1bWUsIHNlcCA9ICIgIiwgY29sbGFwc2UgPSAiICIpIAogIAogICN0b2tlbml6aW5nIHRoZSByZXN1bWUgCiAgdG9rZW5zID0gdGFibGUodG9rZW5pemVfd29yZHMocmVzdW1lLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RvcHdvcmRzID0gc3RvcHdvcmRzOjpzdG9wd29yZHMoImVuIikpKQogIAogIHRva2VucyA9IGFzLmxpc3QodG9rZW5zLCBhbGwubmFtZXMgPSBGKQogIAogIHRva2VucyA9IGFzLmRhdGEuZnJhbWUodG9rZW5zKQogIHJldHVybih0b2tlbnMpCn0KemFjID0gdG9rZW5pemVfcmVzdW1lKCJ0ZXN0LnR4dCIpCnphYwpgYGAKCmBgYHtyfQojZnVuY3Rpb24gdGhhdCBtdWx0aXBseXMgY29sdW1ucyBpbiBERnMgdG9nZXRoZXIgCm11bHRpID0gZnVuY3Rpb24oZGYpIHsKICBpZiAobGVuZ3RoKGRmKSA8IDMpIHsKICAgIHJldHVybihkZlsxXSAqIGRmWzJdKQogIH0gZWxzZSB7CiAgICB0aW1lcyA9IGRmWzFdICogZGZbMl0KICAgIGZvcihpIGluIDM6bGVuZ3RoKGRmKSkgewogICAgICB0aW1lcyA9IGRmW2ldICogdGltZXMgIAogICAgfQogICAgcmV0dXJuKHRpbWVzKQogIH0KfQoKI2Z1bmN0aW9uIHRoYXQgZ2l2ZXMgYmVzdCBtYXRjaGVzIHRvIHRoZSBvdmVyYWxsIGpvYiBmaWVsZHMgCiNuYWl2ZS9zdHVwaWQgd2F5IG9mIGRvaW5nIGl0IApiZXN0X21hdGNoZXNfb3ZlcnZpZXcgPSBmdW5jdGlvbihsaSwgcmVzdW1lKSB7CiAgbWF0Y2hlcyA9IG1hdHJpeChOQSwgbnJvdyA9IGxlbmd0aChsaSksIG5jb2wgPSAxKQogIAogIGZvcihpIGluIDE6bGVuZ3RoKGxpKSkgewogICAgdGVzdGVyID0gYmluZF9yb3dzKGxpW1tpXV0sIHJlc3VtZSkKICAgIHRlc3Rlcltpcy5uYSh0ZXN0ZXIpXSA9IDAKICAgIHNjb3JlID0gc3VtKGFwcGx5KHRlc3RlciwgMiwgbXVsdGkpLCBuYS5ybSA9IFQpCiAgICAjbWF0Y2hlc1tbbmFtZXMobGkpW2ldXV0gPSBzY29yZQogICAgbWF0Y2hlc1tpLDFdID0gc2NvcmUKICB9CiAgcm93bmFtZXMobWF0Y2hlcykgPSBuYW1lcyhsaSkKICByZXR1cm4obWF0Y2hlcykKfQoKc2FyYWggPSB0b2tlbml6ZV9yZXN1bWUoInNhcmFocmVzdW1ldHh0LnR4dCIpCnNhcmFoCgp6YWMgPSB0b2tlbml6ZV9yZXN1bWUoInRlc3QudHh0IikKemFjCgpibSA9IGJlc3RfbWF0Y2hlc19vdmVydmlldyhtYXN0ZXJfbGksIHNhcmFoKQpwcmludChzb3J0KGJtWywxXSwgZGVjcmVhc2luZyA9IFQpKQoKI2NvbnZlcnRpbmcgam9iIG5hbWVzIHRvIGEgbWF0cml4CmpvYl9uYW1lcyAgPSBuYW1lcyhzb3J0KGJtWywxXSwgZGVjcmVhc2luZyA9IFQpWzE6M10pCmpvYl9uYW1lcyA9IHN0cl9zcGxpdF9maXhlZChqb2JfbmFtZXMsICIiLCAxKQoKYGBgClNpbmNlIGl0IGlzIGEgaHVtYW4gaW4gdGhlIG1vZGVsIGFsZ28gd2UgY2FuIHRoZW4gZWl0aGVyIGRldGVybWluZSBpZiB0aGVzZSBzZWVtIGxpa2UgdGhleSB3aWxsIGZpdCBvciBub3QuCldlIGNvdWxkIGp1c3QgZ3JhYiB0aGUgZmlyc3QgMyBhbmQgZ2V0IGEgZmV3IG1vcmUgZXhhbXBsZXMgCgpgYGB7cn0KYmVzdF9tYXRjaGVzID0gZnVuY3Rpb24oZGYsIGpvYl9uYW1lLCByZXN1bWUpIHsKICBtYXRjaGVzID0gbGlzdCgpCiAgI291dGVybG9vcCBmb3IgdGhlIG92ZXJhbGwgam9iIGNhdGVnb3J5CiAgZm9yIChpIGluIDE6bnJvdyhqb2JfbmFtZSkpIHsKICAgICNpbml0aWFsaXplIGEgc2NvcmUgYW5kIGFuIGluZGV4IHRvIHJldHVybiBsYXRlciBvbiAKICAgIHNjb3JlID0gMAogICAgaW5kZXggPSAxCiAgICBwcmludChqb2JfbmFtZVtpLDFdKQogICAgI2lubmVybG9vcCB0byBnZXQgdGhlIGJlc3Qgam9iIG1hdGNoIGZyb20gdGhlIGNhdGVnb3J5CiAgICBmb3IoaiBpbiAxOm5yb3coZGZbW2ldXSkpIHsKICAgICAgI3RlbXAgaXMgYSBERiBvZiBqdXN0IHRoZSBqdGggcm93IG9ubHkgY29udGFpbmluZyBjb2x1bW5zIHdpdGggdG9rZW5pemF0aW9uCiAgICAgIHRlbXAgPSBkZltbam9iX25hbWVbaSwxXSBdXVtqLCA2Om5jb2woZGZbW2pvYl9uYW1lW2ksMV1dXSldCiAgICAgIHRlc3RlciA9IGJpbmRfcm93cyh0ZW1wLCByZXN1bWUpCiAgICAgIHRlc3Rlcltpcy5uYSh0ZXN0ZXIpXSA9IDAKICAgICAgbmV3X3Njb3JlID0gc3VtKGFwcGx5KHRlc3RlciwgMiwgbXVsdGkpLCBuYS5ybSA9IFQpCiAgICAgICNjaGFuZ2VzIHRoZSBiZXN0IHNlZW4gc2NvcmUgCiAgICAgIGlmIChuZXdfc2NvcmUgPiBzY29yZSkgewogICAgICAgIGluZGV4ID0gaiAKICAgICAgICBzY29yZSA9IG5ld19zY29yZSAKICAgICAgfQogICAgICAKICAgIH0KICAgICNzZXRzIHRoZSBsaXN0IHdpdGggbmFtZSBvZiBvdmVyYWxsIGpvYiBhbmQgYXBwZW5kcyB0aGUgYmVzdCBzY29yaW5nIGRlc2NyaXB0aW9uIAogICAgbWF0Y2hlc1tbam9iX25hbWVbaSwxXV1dID0gZGZbW2pvYl9uYW1lW2ksMV1dXVtpbmRleCwgNF0KICAgIH0KICByZXR1cm4obWF0Y2hlcykKfQoKCmJlc3QgPSBiZXN0X21hdGNoZXMoc3BsaXRfZGYsIGpvYl9uYW1lcywgc2FyYWgpCmJlc3QKYGBgCmBgYHtyfQptYXgobWFzdGVyX2xpJGh1bWFuX3Jlc291cmNlX3NwZWNpYWxpc3QpCmBgYAoKIApgYGB7cn0KI3NwbGl0IGRhdGFmcmFtZSBpbnRvIG5ldyBkYXRhZnJhbWVzIGJhc2VkIG9uIGpvYiBkZXNjcmlwdGlvbgpsaWJyYXJ5KGRwbHlyKQptYXN0ZXJfZGYgPC0gam9ic19kZiAlPiUKICBncm91cF9ieShKb2IpCgpncm91cF9zcGxpdChtYXN0ZXJfZGYpCmdyb3VwX2tleXMobWFzdGVyX2RmKQoKI3N0b3B3b3Jkczo6c3RvcHdvcmRzKCJlbiIsIHNvdXJjZSA9ICJzbm93YmFsbCIpCgpgYGAKCiMjIE5FVyBNRVRIT0RTOgojIyMgYnJlYWFraW5nIGpvYnMgZG93biB0byBzdGVtIGFuZCBub24tc3RlbSB0aGVuIGNvbXBhcmluZyBqb2JzCgpgYGB7cn0KbWFzdGVyX2xpX25vbmZhY3RvcmVkID0gbGlzdCgpCmZvciAoaSBpbiAxOmxlbmd0aChzcGxpdF9kZikpIHsKICAKICAjZ2V0dGluZyBhIHRlbXAgaG9sZGVyCiAgdGVtcCA9IGFwcGx5KHNwbGl0X2RmW1tpXV1bLDY6bGVuZ3RoKHNwbGl0X2RmW1tpXV0pXSwgMiwgc3VtKQogICNnZXR0aW5nIGEgbm9ybWFsaXphdGlvbiBmYWN0b3IgCiAgI2ZhY3RvciA9IG1lYW4oYXBwbHkoc3BsaXRfZGZbW2ldXVssNjpsZW5ndGgoc3BsaXRfZGZbW2ldXSldLCAxLCBzdW0pKQogICNnZXR0aW5nIHNvbWUgc29ydCBvZiBub3JtYWxpemVkIGRhdGEgZm9yIHRoZSBtYXN0ZXIgbGlzdAogIG1hc3Rlcl9saV9ub25mYWN0b3JlZFtbbmFtZXMoc3BsaXRfZGYpW2ldXV0gPSB0ZW1wICMvIGZhY3RvcgogIAp9CgoKI3N0ZW0gam9icyBmb3IgYSBuZXcgREYKc3RlbV9qb2JzIDwtIGMoInV4X2Rlc2lnbmVyIiwgInRlc3RfZW5naW5lZXIiLCAic2l0ZV9yZWxpYWJpbGl0eV9lbmdpbmVlciIsICJkYXRhX2FyY2hpdGVjdCIsICJkYXRhX3NjaWVudGlzdCIsICJzb2Z0d2FyZV9kZXZlbG9wZXIiLCAic3RhdGlzdGljaWFuIiwgImRlZXBfbGVhcm5pbmciLCAibWFjaGluZV9sZWFybmluZ19lbmdpbmVlciIsICJidXNpbmVzc19hbmFseXN0IikKCiNub24tc3RlbSBqb2JzIGZvciBhIG5ldyBERgpub25fc3RlbV9qb2JzIDwtIGMoInJlY3J1aXRlciIsICJtYXJrZXRpbmciLCAic2FsZXMiLCAib2ZmaWNlX21hbmFnZXIiLCAiaHVtYW5fcmVzb3VyY2Vfc3BlY2lhbGlzdCIsICJyZXNlYXJjaGVyIikKCiNpbml0aWFsaXppbmcgc3RlbSBERgpzdGVtX2RmIDwtIGRhdGEuZnJhbWUoKQoKI2NyZWF0aW5nIG5ldyBzdGVtIERGIApmb3IgKHRpdGxlIGluIHN0ZW1fam9icykgewogIHN0ZW1fZGYgPC0gYmluZF9yb3dzKHN0ZW1fZGYsIG1hc3Rlcl9saV9ub25mYWN0b3JlZFtbdGl0bGVdXSkKICAKfQojcmVtb3ZpbmcgTkEgYW5kIHJlcGxhY2luZyB3aXRoIDAKc3RlbV9kZltpcy5uYShzdGVtX2RmKV0gPSAwCnN0ZW1fZGYKCiNpbml0aWFsaXppbmcgCm5vbl9zdGVtX2RmIDwtIGRhdGEuZnJhbWUoKQoKI2NyZWF0aW5nIG5vbiBzdGVtIERGCmZvciAodGl0bGUgaW4gbm9uX3N0ZW1fam9icykgewogIG5vbl9zdGVtX2RmIDwtIGJpbmRfcm93cyhub25fc3RlbV9kZiwgbWFzdGVyX2xpX25vbmZhY3RvcmVkW1t0aXRsZV1dKQogIAp9CiNhZGRpbmcgMCBmb3IgTkEKbm9uX3N0ZW1fZGZbaXMubmEobm9uX3N0ZW1fZGYpXSA9IDAKbm9uX3N0ZW1fZGYKYGBgCmBgYHtyfQpzdGVtX3Rva2VucyA8LSBjb2xuYW1lcyhzdGVtX2RmKQpub25fc3RlbV90b2tlbnMgPC0gY29sbmFtZXMobm9uX3N0ZW1fZGYpCgojZ2V0dGluZyB1bmlxdWUgc2V0IG9mIHRva2VucyAKdW5pcXVlX3N0ZW1fdG9rZW5zIDwtIHNldGRpZmYoc3RlbV90b2tlbnMsIG5vbl9zdGVtX3Rva2VucykKdW5pcXVlX25vbl9zdGVtX3Rva2VucyA8LSBzZXRkaWZmKG5vbl9zdGVtX3Rva2Vucywgc3RlbV90b2tlbnMpCmBgYAoKCiMjIHBhcnQgZm9yIHNlZWluZyBpZiB0aGUgcmVzdW1lIG1hdGNoZXMgc3RlbSBvciBub24gc3RlbSBtb3JlIAoKYGBge3J9CnN0ZW1fZGYgPSBzdGVtX2RmW3VuaXF1ZV9zdGVtX3Rva2Vuc10Kbm9uX3N0ZW1fZGYgPSBub25fc3RlbV9kZlt1bmlxdWVfbm9uX3N0ZW1fdG9rZW5zXQoKIyAgIAojdG90YWxfc3RlbSA9IHNvcnQoYXBwbHkoc3RlbV9kZiwgMiwgc3VtKSwgZGVjcmVhc2luZyA9IFQpCiN0b3RhbF9ub25fc3RlbSA9IHNvcnQoYXBwbHkobm9uX3N0ZW1fZGYsIDIsIHN1bSksIGRlY3JlYXNpbmcgPSBUKQoKCiNzZWVpbmcgaWYgYSByZXN1bWUgaXMgc3RlbSBvciBub24gc3RlbSAKc3RlbV9vcl9ub24gPSBmdW5jdGlvbihyZXN1bWUsIHN0ZW0sIG5vbl9zdGVtKXsKICBjb3VudF9zdGVtID0gMCAKICBjb3VudF9ub25fc3RlbSA9IDAKICBmb3IgKGkgaW4gbmFtZXMocmVzdW1lKSkgewogICAgaWYgKGkgJWluJSBuYW1lcyhzdGVtKSkgewogICAgICBjb3VudF9zdGVtID0gY291bnRfc3RlbSArIDEgCiAgICB9CiAgICBlbHNlIGlmIChpICVpbiUgbmFtZXMobm9uX3N0ZW0pKSB7CiAgICAgIGNvdW50X25vbl9zdGVtID0gY291bnRfc3RlbSArIDEKICAgIH0KICB9CiAgbGkgPSBsaXN0KCJzdGVtIiA9IGNvdW50X3N0ZW0sICJub25fc3RlbSIgPSBjb3VudF9ub25fc3RlbSkKICByZXR1cm4obGkpCn0KCgoKdmFsdWVzID0gc3RlbV9vcl9ub24oemFjLCBzdGVtX2RmLCBub25fc3RlbV9kZikKYGBgCgoKCmBgYHtyfQoKI2NyZWF0ZSBhIHZhciB0byByZXByZXNlbnQgd2VhdGhlciB0aGUgcmVzdW1lIGlzIHN0ZW0gb3Igbm90CmlmICh2YWx1ZXMkc3RlbSA8IHZhbHVlcyRub25fc3RlbSkgewogIHJlc3VtZV9pc19zdGVtID0gRkFMU0UKfSBlbHNlIHsKICByZXN1bWVfaXNfc3RlbSA9IFQKfQoKCiNuZXcgZGYgdG8gaG9sZCB0aGUgcmFuZG9tbHkgc2FtcGxlZCBqb2IgZGVzY3JpcHRpb25zCiN0aGVzZSBqb2IgZGVzY3Mgd2lsbCBiZSB1c2VkIHRvIG1pbmUgcGFyYW1ldGVycyB0byBmaXQgYSBwcmVkaWN0aXZlIG1vZGVsIHRoYXQgZ2l2ZXMgd2hhdCB0aGUgdXNlciB3aWxsIGxpa2UKc2FtcGxlX2pvYl9kZXNjcmlwdGlvbnMgPC0gZGF0YS5mcmFtZSgpCgojbm9uIHN0ZW0gZGYKbm9uX3N0ZW1fam9ic19kZiA8LSBqb2JzX2RmW2pvYnNfZGYkSm9iPT0icmVjcnVpdGVyInxqb2JzX2RmJEpvYj09Im1hcmtldGluZyJ8am9ic19kZiRKb2I9PSJzYWxlcyJ8ICAgICAgICAgICAgICAgICAgICAgIGpvYnNfZGYkSm9iPT0ib2ZmaWNlX21hbmFnZXIifGpvYnNfZGYkSm9iPT0iaHVtYW5fcmVzb3VyY2Vfc3BlY2lhbGlzdCIgfGpvYnNfZGYkSm9iPT0icmVzZWFyY2hlciIgLF0KCiNzdGVtIGRmCnN0ZW1fam9ic19kZiA8LSBqb2JzX2RmW2pvYnNfZGYkSm9iID09ICJ1eF9kZXNpZ25lciIgfCBqb2JzX2RmJEpvYiA9PSAidGVzdF9lbmdpbmVlciIgfGpvYnNfZGYkSm9iID09ICJzaXRlX3JlbGlhYmlsaXR5X2VuZ2luZWVyIiB8am9ic19kZiRKb2IgPT0gImRhdGFfYXJjaGl0ZWN0IiB8am9ic19kZiRKb2IgPT0gImRhdGFfc2NpZW50aXN0IiB8am9ic19kZiRKb2IgPT0gInNvZnR3YXJlX2RldmVsb3BlciIgfGpvYnNfZGYkSm9iID09ICJzdGF0aXN0aWNpYW4iIHxqb2JzX2RmJEpvYiA9PSAiZGVlcF9sZWFybmluZyIgfGpvYnNfZGYkSm9iID09ICJtYWNoaW5lX2xlYXJuaW5nX2VuZ2luZWVyIiB8am9ic19kZiRKb2IgPT0gImJ1c2luZXNzX2FuYWx5c3QiLF0KCnNhbXBsaW5nID0gZnVuY3Rpb24ocmVzdW1lX2lzX3N0ZW0sIE4pIHsKICAjaWYgdGhlIHJlc3VtZSBpcyBmbGFnZ2VkIGFzIHN0ZW0KICBpZihyZXN1bWVfaXNfc3RlbSA9PSBGQUxTRSl7CiAgICAKICAgICNyYW5kb21seSBzYW1wbGUgbiBqb2JzIGZyb20gdGhlIG1hc3RlciBkZgogICAgc2FtcGxlX2pvYl9kZXNjcmlwdGlvbnMgPC0gc2FtcGxlX24obm9uX3N0ZW1fam9ic19kZiwgTikKICAgIAogICAgI2NyZWF0ZSBuZXcgY29sIHRvIHJlcHJlc2VudCBpZiB0aGUgdXNlciBsaWtlZCB0aGUgam9iIG9yIG5vdAogICAgc2FtcGxlX2pvYl9kZXNjcmlwdGlvbnMkbGlrZWRfb3Jfbm90IDwtIE5BCiAgICAKICAgICNsb29wIHRocnUgdGhlIGRmLCBzaG93IHRoZSB1c2VyIGEgam9iIGRlc2MgYW5kIGFzayB0aGVtIGlmIHRoZXkgbGlrZSBpdC4gSWYgdGhleSBsaWtlIGl0LCBhc3NpZ24gYSB2YWx1ZSBvZiAxIHRvIHRoZSBsaWtlZF9vcl9ub3QgY29sLCBpZiB0aGV5ICAgIGRvbid0IGFzc2lnbiBhIHZhbHVlIG9mIDAKICAgIGZvciAoaW5kZXggaW4gMTpOKSB7CiAgICAgIGpvYl9kZXNjIDwtIHN0cihzYW1wbGVfam9iX2Rlc2NyaXB0aW9ucyREZXNjcmlwdGlvbltpbmRleF0pCiAgICAgIHVzZXJfaW5wdXQgPC0gcmVhZGxpbmUocHJvbXB0PSBqb2JfZGVzYykKICAgICAgaWYodXNlcl9pbnB1dCA9PSAieSIpewogICAgICAgIHNhbXBsZV9qb2JfZGVzY3JpcHRpb25zJGxpa2VkX29yX25vdFtpbmRleF0gPSAxCiAgICAgIH1lbHNlewogICAgICAgIHNhbXBsZV9qb2JfZGVzY3JpcHRpb25zJGxpa2VkX29yX25vdFtpbmRleF0gPSAwCiAgICAgIH0KICAgIH0gCiAgICAjcmVwZWF0IHByb2Nlc3MgZm9yIHN0ZW0KICB9ZWxzZXsKICAgIHNhbXBsZV9qb2JfZGVzY3JpcHRpb25zIDwtIHNhbXBsZV9uKHN0ZW1fam9ic19kZiwgTikKICAgIHNhbXBsZV9qb2JfZGVzY3JpcHRpb25zJGxpa2VkX29yX25vdCA8LSBOQQogICAgZm9yIChpbmRleCBpbiAxOk4pIHsKICAgICAgam9iX2Rlc2MgPC0gc3RyKHNhbXBsZV9qb2JfZGVzY3JpcHRpb25zJERlc2NyaXB0aW9uW2luZGV4XSkKICAgICAgdXNlcl9pbnB1dCA8LSByZWFkbGluZShwcm9tcHQ9IGpvYl9kZXNjKQogICAgICBpZih1c2VyX2lucHV0ID09ICJ5Iil7CiAgICAgICAgc2FtcGxlX2pvYl9kZXNjcmlwdGlvbnMkbGlrZWRfb3Jfbm90W2luZGV4XSA9IDEKICAgICAgICBwcmludChzYW1wbGVfam9iX2Rlc2NyaXB0aW9ucyRsaWtlZF9vcl9ub3RbaW5kZXhdKQogICAgICB9ZWxzZXsKICAgICAgICBzYW1wbGVfam9iX2Rlc2NyaXB0aW9ucyRsaWtlZF9vcl9ub3RbaW5kZXhdID0gMAogICAgICB9CiAgICB9CiAgfQogIHJldHVybihzYW1wbGVfam9iX2Rlc2NyaXB0aW9ucykKfQoKCiNtYWtpbmcgYSB0cmFpbmluZyBzZXQgCnRyYWluID0gc2FtcGxpbmcocmVzdW1lX2lzX3N0ZW0sIDEwKQp0cmFpbiA9IHRva2VuaXplLmRmKHRyYWluKQoKI25lZWQgZGYgd2l0aCAyIGNvbHMsIDEgaXMgeWVzL25vLCBhbmQgdGhlIG90aGVyIGlzIGEgdG9rZW4gY29sdW1uIHRoYXQgbGlzdHMgdGhlIHRva2VuCgoKYGBgCgoKCgoKCgoKYGBge3J9CiNnZXR0aW5nIHRlc3Qgam9icwp0ZXN0X2pvYnMgPC0gc2FtcGxlX24obm9uX3N0ZW1fam9ic19kZiwgNSkKdGVzdF9qb2JzID0gdG9rZW5pemUuZGYodGVzdF9qb2JzKQoKI3NwbGl0dGluZyBpbnRvIFl+WAp5X3RyYWluID0gdHJhaW5bLDZdCnhfdHJhaW4gPSB0cmFpblssNzpsZW5ndGgodHJhaW4pXQoKeF90ZXN0ID0gdGVzdF9qb2JzWywgNjpsZW5ndGgodGVzdF9qb2JzKV0gCgp4X3RyYWluID0gYXMuZGF0YS5mcmFtZShhcy5tYXRyaXgoeF90cmFpbikpCgpmb3IoaSBpbiBuYW1lcyh4X3RyYWluKSkgewogIHByaW50KGkpCiAgeF90cmFpbltpLF0gPSBhcy5pbnRlZ2VyKHhfdHJhaW5baSxdKQp9CgpuYW1lID0gYXBwbHkoeF90cmFpbiwyLHN1bSkgPiA1CnhfdHJhaW4gPSB4X3RyYWluWyxuYW1lXQoKCm1vZCA8LSBnbG0oeV90cmFpbiB+IC4gLCBkYXRhID0geF90cmFpbiwgICBmYW1pbHk9Ymlub21pYWwobGluaz0ibG9naXQiKSkKdGVzdF9wcm9icyA8LSBwcmVkaWN0KG1vZCwgbmV3ZGF0YT14X3Rlc3QsIHR5cGU9InJlc3BvbnNlIikKCgoKbW9kIDwtIGdsbSh5X3RyYWluIH4gLiAsIGRhdGEgPSB4X3RyYWluLCAgIGZhbWlseT1iaW5vbWlhbChsaW5rPSJsb2dpdCIpKQoKY2xhc3MoeV90cmFpbikKbGlicmFyeShnbG1uZXQpCgpsYXNzb19jdiA8LSBjdi5nbG1uZXQoeF90cmFpbiwgdHJhaW4kbGlrZWRfb3Jfbm90LCBhbHBoYT0xKQpjb2VmKGxhc3NvX2N2KQpsYXNzb19tb2QgPC0gZ2xtbmV0KHhfdHJhaW4sIHlfdHJhaW4sIGFscGhhPTEsIGxhbWJkYT1sYXNzb19jdiRsYW1iZGEuMXNlKQpjb2VmKGxhc3NvX21vZCkKYGBgCgoKCgoKCgoKCgoKCg==